This project will conduct analysis on stock market and discuss about what strategies are the best to optimize our portfolio values. In the future I will be using machine learning techniques to build a model to optimize the portfolio value. Currently, this project is in an exploratory stage when I am conducting general analysis on each of the stock.
import pandas as pd
import yfinance as yf
import time
import matplotlib.pyplot as plt
import numpy as np
import technical_indicators as ti
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from pypfopt import EfficientFrontier
from pypfopt import risk_models
from pypfopt import expected_returns
# common methods to get daily return and to normalize the data using pandas
def get_daily_return(data):
return data / data.shift(1) - 1
def normalize(data):
return data / data.iloc[0]
stocks = yf.download('SPY NOK V GLD DIS SBUX AMD MSFT BA TMUS CVX', period='3y') #1y, 5y
#stocks = stocks.ffill().bfill()
close = stocks['Adj Close']
spy = close['SPY']
interested_stocks = close.drop(['SPY'], axis=1)
Note that stocks = stocks.ffill().bfill() would be needed if we were looking way back in time when a particular stock no longer exist / hadn't existed during that time.
dailyReturn = get_daily_return(interested_stocks)
dailyReturn = dailyReturn.iloc[1:, :]
dailyReturnSPY = get_daily_return(spy)
dailyReturnSPY = dailyReturnSPY.iloc[1:]
normalizedStocks = normalize(interested_stocks)
fig1 = px.line(normalize(interested_stocks),
labels={"value":"Cumulative Return", 'variable': 'Symbol'},
title='Cumulative Return Overview'
)
fig1.update_xaxes(
rangeslider_visible=True,
rangeselector=dict(
buttons=list([
dict(count=1, label="1m", step="month", stepmode="backward"),
dict(count=6, label="6m", step="month", stepmode="backward"),
dict(count=1, label="YTD", step="year", stepmode="todate"),
dict(count=1, label="1y", step="year", stepmode="backward"),
dict(step="all")
])
)
)
We want
correlations = dailyReturn.corr()
px.imshow(correlations, title='Correlation Heatmap')
We are able to see which stocks are strongly and positively correlated to each other. Namely, Microsoft and Visa has a strong and positive correlation to each other. On the other hand, GLD has a weak or no correlation
Note that Correlation of 1 indicates that if variable A increases, variable B also increases. While correlation of -1 indicates that when A increases, B decreases.
stds = dailyReturn.std().rename('std')
means = dailyReturn.mean().rename('mean')
fig = px.scatter(x=stds,
y=means,
color=means.index,
text=means.index,
labels={"x":"Risk", 'y': 'Return'}
)
mu = expected_returns.mean_historical_return(interested_stocks, frequency=252)
S = risk_models.sample_cov(interested_stocks, frequency=252)
ef = EfficientFrontier(mu, S)
ef.max_sharpe()
w = ef.clean_weights()
w = pd.Series(w)
fig.add_trace(
go.Scatter(
x=[(stds * w).sum()],
y=[(means * w).sum()],
mode='markers',
marker_symbol='x',
marker_size=15,
marker_color='blue',
text=['Best Sharpe Ratio']
)
)
fig.update_traces(textposition='top center')
fig.update_layout(showlegend=False)
fig.show()
print ('Best weight allocation:')
print (w)
ef.portfolio_performance(verbose=True)
Sharpe Ratio is used to help investor determine the return of an investment compared to its risk. With the stocks I selected, it shows that a combination of AMD, GLD, and MSFT generate the highest Sharpe Ratio.
Looking at the weights, we can see roughly 65% comes from GLD, 25% comes from MSFT, and 10% from AMD. It makes sense because GLD is considered a safe stock, where the return is small but the risk is also minimal. MSFT is considered a medium risky stock, but it generates a higher return than GLD. AMD, lastly, is a very risky stock as it's volatility is high. A 65%/25%/10% is a reasonable strategy for an average investor that wants a "okay" return with minimal risk.
We will continue to investigate these three stocks below.
bestStocks = interested_stocks[['GLD','MSFT','AMD']]
macd = ti.macd(bestStocks, 12, 26)
signal_line = macd.ewm(span=9, adjust=False, ignore_na=False).mean()
def plot_MACD(symbol, data, macd, signal_line):
fig = make_subplots(rows=2, cols=1,
shared_xaxes=True,
vertical_spacing=0.05,
subplot_titles=("Stock Price", "MACD & Signal Line"))
top_plot = go.Scatter(x = data[symbol].index, y = data[symbol].values, name=symbol + " Price")
macd_sym = macd[symbol]
signal_sym = signal_line[symbol]
bottom_plot_1 = go.Scatter(x = macd_sym.index, y = macd_sym.values, name='MACD')
bottom_plot_2 = go.Scatter(x = signal_sym.index, y = signal_sym.values, name='Signal Line')
fig.append_trace(top_plot, row=1, col=1)
fig.append_trace(bottom_plot_1, row=2, col=1)
fig.append_trace(bottom_plot_2, row=2, col=1)
fig.update_layout(height=750, width=950, title_text="MACD on "+ symbol)
fig.update_xaxes(row=2, col=1, rangeslider_visible=True)
fig.update_xaxes(
row=1, col=1,
rangeselector=dict(
buttons=list([
dict(count=1, label="1m", step="month", stepmode="backward"),
dict(count=6, label="6m", step="month", stepmode="backward"),
dict(count=1, label="YTD", step="year", stepmode="todate"),
dict(count=1, label="1y", step="year", stepmode="backward"),
dict(step="all")
])
)
)
fig.show()
plot_MACD('MSFT', bestStocks, macd, signal_line)
rsi = ti.rsi(bestStocks, look_back_period=14)
def plot_RSI(symbol, data, rsi):
rsi_sym = rsi[symbol]
fig = make_subplots(rows=2, cols=1,
shared_xaxes=True,
vertical_spacing=0.05,
subplot_titles=("Stock Price", "RSI"))
top_plot = go.Scatter(x=data[symbol].index, y=data[symbol].values, name=symbol + ' Price')
bottom_plot_1 = go.Scatter(x=rsi_sym.index, y=rsi_sym.values, name='RSI')
fig.append_trace(top_plot, row=1, col=1)
fig.append_trace(bottom_plot_1, row=2, col=1)
fig.add_shape(row=2, col=1, type='rect',
x0=bestStocks.index [0], y0=70,
x1=bestStocks.index[-1], y1=30,
fillcolor='green',
opacity=0.1 )
fig.update_layout(height=750, width=950, title_text="RSI on " + symbol)
fig.update_xaxes(row=2, col=1, rangeslider_visible=True)
fig.update_xaxes(
row=1, col=1,
rangeselector=dict(
buttons=list([
dict(count=1, label="1m", step="month", stepmode="backward"),
dict(count=6, label="6m", step="month", stepmode="backward"),
dict(count=1, label="YTD", step="year", stepmode="todate"),
dict(count=1, label="1y", step="year", stepmode="backward"),
dict(step="all")
])))
fig.show()
plot_RSI('GLD',bestStocks, rsi)